* [] brackets indicate redacted internal census variable name or directory that can't be disclosed. ;

* define libraries;
libname hannah  '/projects/users/########';
libname ben '/projects/users/########';

* get county centroids;
* projections are slightly different so use two sources just in case;
data work.counties;
   set maps.county;
   where state in (01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56);  /* FIPS codes for our states */
   format state z2. county z3.;
   state2 = put(state,z2. -L);
   county2 = put(county,z3. -L);
   ctyfips = cats(state2,county2);
run;
%CENTROID(work.counties,work.countycenters,ctyfips);
data work.countycenters;
set work.countycenters;
lat = y*45/atan(1);
long = -x*45/atan(1);
y1 = lat;
x1 = long;
drop lat long x y;
run;

data test;
  set mapsgfk.us_counties (where=(state in (01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56)));
  x = long;
  y = lat;
run;
%CENTROID(work.test,work.countycenters2,ID);
data work.countycenters2;
set work.countycenters2;
ctyfips = substr(ID,4,5);
run;

data work.countycenters;
  merge work.countycenters work.countycenters2 (keep = x y ctyfips);
  by ctyfips;
run;

* collapse ehf_icf_states_plants (worker data) to county-year level; 
data hannah.ehf_icf_states_plants;
set hannah.ehf_icf_states_plants;
ctyfips = substr([geocode],1,5);
yc = [latitude]/1000000;
xc = [longitude]/1000000;
run;

proc sql;
  CREATE TABLE hannah.ehf_icf_all_county_annual AS
  SELECT [county id], year,
  SUM([annual earnings]) as total_earn,
  MEAN(age) as mean_age,
  COUNT(*) as total_wrkrs,
  SUM(unemp) as total_unemp_wrkrs,
  MEAN(female) as mean_female,
  MEAN(bornusa) as mean_bornusa,
  MEAN(hispanic) as mean_hisp,
  MEAN(white) as mean_white,
  MEAN(black) as mean_black,
  MEAN(nohigh) as mean_nohigh,
  MEAN(highsch) as mean_highsch,
  MEAN(somecoll) as mean_somecoll,
  MEAN(college) as mean_college,
	SUM(case when white=1 then [annual earnings] else 0 end) as tot_white_earn,
	SUM(case when white=0 then [annual earnings] else 0 end) as tot_nonwhite_earn,
	SUM(case when white=1 then 1 else 0 end) as tot_white_wrkrs,
	SUM(case when white=1 then unemp else 0 end) as tot_white_unemp,
	SUM(case when white=0 then 1 else 0 end) as tot_nonwhite_wrkrs,
	SUM(case when white=0 then unemp else 0 end) as tot_nonwhite_unemp,
  MEAN(x) as meanx,
  MEAN(y) as meany
  FROM hannah.ehf_icf_states_plants
  GROUP BY [county id], year
  ORDER BY [county id], year;
quit;

data hannah.ehf_icf_all_county_annual;
set hannah.ehf_icf_all_county_annual;
ctyfips = [county id];
run;

data hannah.ehf_icf_all_county_annual;
  merge hannah.ehf_icf_all_county_annual work.countycenters;
  by ctyfips;
run;

/*
* collapse wind plant data to county-annual;
proc sort data=ben.plants;
by t_fips p_year;
run;

proc import out = ben.plants_csm 
  datafile = "/projects/users/########/PlantLevelWindDatabaseCSM.dta"
  dbms=dta
  replace;
run;

data ben.plants_csm;
  set ben.plants_csm;
  stfips = substr(t_fips,1,2);
run;

proc sql;
  create table hannah.countywind as
  select t_fips, p_year,
  sum(p_tnum) as new_p_tnum,
  sum(p_cap) as new_p_cap,
  max(t_state) as t_state
  from ben.plants_csm
  group by t_fips, p_year
  order by t_fips, p_year;
quit;
*/
